'''
@Author: your name
@Date: 2020-03-25 11:38:24
@LastEditTime: 2020-03-25 16:30:36
@LastEditors: Please set LastEditors
@Description: In User Settings Edit
@FilePath: \giee\learn_python\爬虫\爬虫基础\练习项目\图文并茂网站.py
'''

from fake_useragent import UserAgent
import requests
from lxml import etree

def re_html(html):
    """解析网站"""

    e = etree.HTML(html)
    title = e.xpath('string(//div/h1[@style])')
    all_p = e.xpath('//div[@class="left_zw"]//p')
    url_img = e.xpath('//div[@class="content"]//div[@style]/a/img/@src')
    img_name = e.xpath('//div[@class="content"]//div[@style]/a/img/@title')
    content = []
    for i in all_p:
        info = i.xpath('strign(.)')
        content.append(info)
    
    content_str = "".join(content)
    print(title.text)

    return title, content_str, url_img, img_name

def get_html():
    """获取网页内容"""

    url = "https://www.chinanews.com/gj/2020/03-25/9136613.shtml"
    headers = {
        "User-Agent": UserAgent().random
    }

    response = requests.get(url, headers=headers)
    # html = response.content
    # html_doc=html.decode("utf-8", "ignore")
    response.encoding = "utf-8"

    return response.text


"""主函数"""
html = get_html()
re_output = re_html(html)
print("标题：{}".format(re_output[1])